Histograms are at the bottom of this notebook


In [2]:
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import pandas as pd
import numpy as np
import os

Gen0


In [6]:
#upload geneotypes and phenotypes 
Gen0_genotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen0+genotypes.p'))
Gen0_phenotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen0+phenotypes.p'))
Gen0_phenotypes.sort_values('test_accuracy',ascending=False).head()


Out[6]:
gene_name misclassed test_accuracy test_loss time train_accuracy train_loss
0 lab3000_n1e1p1b2+Gen0+gene13 {'true_class': [4, 4, 5, 23, 20, 1, 40, 15, 1,... 0.803651 0.883051 47.459044 0.965607 0.120601
0 lab3000_n1e1p1b2+Gen0+gene6 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.780944 1.239399 31.594541 0.962266 0.149377
0 lab3000_n1e1p1b2+Gen0+gene24 {'true_class': [4, 4, 5, 23, 10, 20, 1, 40, 15... 0.752449 1.030251 194.952801 0.827910 0.682031
0 lab3000_n1e1p1b2+Gen0+gene25 {'true_class': [4, 4, 5, 23, 8, 3, 6, 10, 20, ... 0.743544 1.087753 141.736290 0.838550 0.620869
0 lab3000_n1e1p1b2+Gen0+gene11 {'true_class': [10, 4, 4, 5, 23, 8, 6, 20, 1, ... 0.723063 1.521470 26.861886 0.932327 0.261401

Gen1


In [7]:
#upload geneotypes and phenotypes 
Gen1_genotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen1+genotypes.p'))
Gen1_phenotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen1+phenotypes.p'))
Gen1_phenotypes.sort_values('test_accuracy',ascending=False).head()


Out[7]:
gene_name misclassed test_accuracy test_loss time train_accuracy train_loss
0 lab3000_n1e1p1b2+Gen1+gene15 {'true_class': [10, 4, 5, 23, 20, 1, 40, 15, 1... 0.803651 0.897263 122.271955 0.964122 0.125870
0 lab3000_n1e1p1b2+Gen1+gene8 {'true_class': [4, 4, 5, 23, 20, 1, 40, 15, 1,... 0.803206 0.934038 225.305663 0.961772 0.174198
0 lab3000_n1e1p1b2+Gen1+gene29 {'true_class': [4, 4, 5, 23, 20, 1, 40, 15, 1,... 0.801425 0.937885 52.716231 0.957442 0.177796
0 lab3000_n1e1p1b2+Gen1+gene20 {'true_class': [3, 4, 4, 5, 23, 20, 1, 40, 15,... 0.792520 1.012021 124.393423 0.958184 0.165223
0 lab3000_n1e1p1b2+Gen1+gene10 {'true_class': [4, 4, 5, 23, 20, 1, 40, 15, 1,... 0.787622 1.014112 127.006645 0.956328 0.164417

Gen2


In [9]:
#upload geneotypes and phenotypes 
Gen2_genotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen2+genotypes.p'))
Gen2_phenotypes = pd.read_pickle(os.path.expanduser('~/Experiments/lab3000_n1e1p1b2/lab3000_n1e1p1b2+Gen2+phenotypes.p'))
Gen2_phenotypes.sort_values('test_accuracy',ascending=False).head()


Out[9]:
gene_name misclassed test_accuracy test_loss time train_accuracy train_loss
0 lab3000_n1e1p1b2+Gen2+gene26 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 4,... 0.814782 0.854156 141.992026 0.927378 0.342601
0 lab3000_n1e1p1b2+Gen2+gene2 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.808994 0.834695 28.404398 0.955586 0.233952
0 lab3000_n1e1p1b2+Gen2+gene16 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.808549 0.933977 58.579753 0.960040 0.165074
0 lab3000_n1e1p1b2+Gen2+gene12 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 21... 0.806768 0.912981 41.787451 0.967710 0.090800
0 lab3000_n1e1p1b2+Gen2+gene25 {'true_class': [4, 5, 23, 20, 1, 40, 15, 1, 4,... 0.806322 0.878398 128.751011 0.965236 0.121338

Plots


In [18]:
sns.set(font_scale=1.3);
sns.set_style("whitegrid");
fig = plt.figure(figsize=(6,16));

ax1 = plt.subplot2grid((4,1),(0,0));
ax1 = sns.distplot(Gen2_phenotypes.test_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[2],label='Gen2_test',kde=False);
ax1 = sns.distplot(Gen2_phenotypes.train_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[4],label='Gen2_train',kde=False);
plt.ylabel("Count");
plt.xlabel("Accuracy");
plt.legend(loc='upper right',fontsize=16);
plt.xlim([0,1.0])

ax1 = plt.subplot2grid((4,1),(1,0));
ax1 = sns.distplot(Gen1_phenotypes.test_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[1],label='Gen1_test',kde=False);
ax1 = sns.distplot(Gen1_phenotypes.train_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[4],label='Gen1_train',kde=False)
plt.ylabel("Count");
plt.xlabel("Accuracy");
plt.legend(loc='upper right',fontsize=16);
plt.xlim([0,1.0])

ax1 = plt.subplot2grid((4,1),(2,0));
ax1 = sns.distplot(Gen0_phenotypes.test_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[0],label='Gen0_test',kde=False);
ax1 = sns.distplot(Gen0_phenotypes.train_accuracy,norm_hist=True,bins=50,
                    color=sns.color_palette()[4],label='Gen0_train',kde=False)
plt.ylabel("Count");
plt.xlabel("Accuracy");
plt.legend(loc='upper right',fontsize=16);
plt.xlim([0,1.0]);
plt.savefig('Gen0_Gen1_Gen2_population_accuracies.png')



In [ ]: